*****************************************************************************
** REPLICATION DO-FILE FOR ARTICLE:                                        **
** MEMORY EFFECTS IN REPEATED SURVEY QUESTIONS - REVIVING THE EMPIRICAL    **
** INVESTIGATION OF THE INDEPENDENT MEASUREMENTS ASSUMPTION                **
** AUTHORS: HANNAH SCHWARZ, MELANIE REVILLA, WIEBKE WEBER                  ** 
*****************************************************************************

global source ""

use "$source/Dataset Schwarz Revilla Weber.dta", clear

******************************************************
** 0) DATA PREPARATION                              **
******************************************************

gen female = 1 if Q197==2 | Q385==2
replace female = 0 if female==.
tab female, m

destring Q199, gen(year1)
destring Q387, gen(year2)
gen age = 2018-year1 if year1!=.
replace age = 2018-year2 if year2!=.
tab age, m

gen university = 1 if Q203==7 | Q391==7
replace university = 0 if university ==.
tab university, m

gen highschool = 1 if Q203==4 | Q391==4
replace highschool = 0 if highschool ==.
tab highschool, m

gen survexp =1 if Q215 ==1 | Q403==1
replace survexp=0 if survexp==.
replace survexp =. if Q215 ==. & Q403==.
tab survexp, m

gen surv4plus = 1 if Q217>4 | Q405>4 // meaning more than 3 times (3 veces)
replace surv4plus=0 if  Q217<=4 | Q405<=4
replace surv4plus=0 if survexp==0
replace surv4plus =. if Q215 ==. & Q403==.
tab surv4plus, m

gen treatment =.
replace treatment=1 if FL_4_DO_TreatmentBlock==1
replace treatment=0 if FL_4_DO_ControlBlock==1
tab treatment, m

gen memory = .
replace memory = Q237 if Q425==.
replace memory = Q425 if Q237==.
recode memory (2=0)
tab memory, m

gen correct_treat = .
replace correct_treat = 1 if ((D30 == Q241) | (D30 == Q245)) & D30!=.
replace correct_treat = 0 if ((D30 != Q241) & (D30 != Q245)) & D30!=.
replace correct_treat=. if (D30==. & D30_0==.) | (Q241==. & Q245==. & Q429==. & Q433==.)
tab correct_treat, m

gen correct_contr = .
replace correct_contr = 1 if ((D30_0 == Q429) | (D30_0 == Q433)) & D30_0!=.
replace correct_contr = 0 if ((D30_0 != Q429) & (D30_0 != Q433)) & D30_0!=.
tab correct_contr, m

gen consistent=0
replace consistent=1 if correct_treat==1 | correct_contr==1
tab consistent, m

** calculate time between first and second repetition

tab Duration, m

* subtract time on welcome page from overall duration
gen duration_clean_treat = Duration - Q434_Page_Submit // Q434_ = timing of welcome page
tab duration_clean_treat, m

gen duration_clean_contr = Duration - Q435_Page_Submit // Q435_ = timing of welcome page
tab duration_clean_contr, m

gen duration = duration_clean_treat if treatment==1
replace duration = duration_clean_contr if treatment==0
tab duration, m

* subtract times of outliers (r's who started before told to do so and were then told to wait)
tab Q221_Page_Submit, m
replace duration = duration-Q221_Page_Submit if Q221_Page_Submit>5 & Q221_Page_Submit!=.
tab Q222_Page_Submit, m
replace duration = duration-Q222_Page_Submit if Q222_Page_Submit>90 & Q222_Page_Submit!=.
tab Q223_Page_Submit, m
tab Q224_Page_Submit, m
replace duration = duration-Q224_Page_Submit if Q224_Page_Submit>90 & Q224_Page_Submit!=.
tab Q225_Page_Submit, m
tab Q226_Page_Submit, m
tab Q227_Page_Submit, m

tab Q342_Page_Submit, m
replace duration = duration-Q342_Page_Submit if Q342_Page_Submit>5 & Q342_Page_Submit!=.
tab Q343_Page_Submit, m
replace duration = duration-Q343_Page_Submit if Q343_Page_Submit>90 & Q343_Page_Submit!=.
tab Q344_Page_Submit, m
tab Q345_Page_Submit, m
tab Q346_Page_Submit, m
tab Q347_Page_Submit, m

* create minutes variable
gen minutes = duration/60
tab minutes, m

* sum up all page_submit times, starting from Q221 (excluding outliers, see above)

* subtract stepwise the durations of the first three questions (treatment group)
gen duration_betw_reps = .
replace duration_betw_reps = duration - Q221_Page_Submit if Q221_Page_Submit<=5 & treatment==1 // subtract only if hasn't been subtracted yet above as outlier
replace duration_betw_reps = duration if Q221_Page_Submit>5 & treatment==1 // use result from above (subtracted  above as outlier)
replace duration_betw_reps = duration_betw_reps - Q222_Page_Submit if Q222_Page_Submit<=90 & treatment==1 
replace duration_betw_reps = duration if Q222_Page_Submit>90 & treatment==1 // use result from above (subtracted  above as outlier)
replace duration_betw_reps = duration_betw_reps - Q223_Page_Submit if treatment==1
replace duration_betw_reps = duration_betw_reps - Q224_Page_Submit if Q224_Page_Submit<=90 & treatment==1 
replace duration_betw_reps = duration if Q224_Page_Submit>90 // use result from above (subtracted  above as outlier)

* subtract duration of answering memory question (treatment group)
replace duration_betw_reps = duration_betw_reps - Q337_Page_Submit - Q338_Page_Submit if Q338_Page_Submit!=. & treatment==1
replace duration_betw_reps = duration_betw_reps - Q337_Page_Submit - Q340_Page_Submit if Q340_Page_Submit!=. & treatment==1

* subtract stepwise the durations of the first two questions (control group)
replace duration_betw_reps = duration - Q342_Page_Submit if Q342_Page_Submit<=5 & treatment == 0 // subtract only if hasn't been subtracted yet above as outlier
replace duration_betw_reps = duration_betw_reps - Q343_Page_Submit if Q343_Page_Submit<=90 & treatment == 0
replace duration_betw_reps = duration_betw_reps - Q344_Page_Submit - Q345_Page_Submit if treatment==0 

* subtract duration of answering memory question (control group)
replace duration_betw_reps = duration_betw_reps - Q429_Page_Submit - Q430_Page_Submit if Q430_Page_Submit!=. & treatment==0
replace duration_betw_reps = duration_betw_reps - Q429_Page_Submit - Q431_Page_Submit if Q431_Page_Submit!=. & treatment==0

* create minutes variable
gen minutes_betw_reps = duration_betw_reps/60
tab minutes_betw_reps, m

** create timing variable of memory and reproduction questions across both treatment and control block

gen Seconds_Memory_Question=.
replace Seconds_Memory_Question=Q224_Page_Submit if FL_4_DO_TreatmentBlock==1
replace Seconds_Memory_Question=Q345_Page_Submit if FL_4_DO_ControlBlock==1
tab Seconds_Memory_Question, m

gen Seconds_Reproduce_Previous=.
replace Seconds_Reproduce_Previous=Q338_Page_Submit if FL_4_DO_TreatmentBlock==1 & Q245==.
tab Seconds_Reproduce_Previous, m
replace Seconds_Reproduce_Previous=Q340_Page_Submit if FL_4_DO_TreatmentBlock==1 & Q241==.
tab Seconds_Reproduce_Previous, m
replace Seconds_Reproduce_Previous=Q430_Page_Submit if FL_4_DO_ControlBlock==1 & Q433==.
tab Seconds_Reproduce_Previous, m
replace Seconds_Reproduce_Previous=Q431_Page_Submit if FL_4_DO_ControlBlock==1 & Q429==.
tab Seconds_Reproduce_Previous, m

* for r's who remembered thus were asked to restate
gen Seconds_Reproduce_Previous_Resta = Q338_Page_Submit if (Q245==. | Q433==.) & FL_4_DO_TreatmentBlock 
replace Seconds_Reproduce_Previous_Resta = Q430_Page_Submit if (Q245==. | Q433==.) & FL_4_DO_ControlBlock==1
tab Seconds_Reproduce_Previous_Resta, m

* for r's who didn't remember thus were asked to guess
gen Seconds_Reproduce_Previous_Guess = Q340_Page_Submit if (Q241==. | Q429==.) & FL_4_DO_TreatmentBlock
replace Seconds_Reproduce_Previous_Guess = Q431_Page_Submit if (Q241==. | Q429==.) & FL_4_DO_ControlBlock==1
tab Seconds_Reproduce_Previous_Guess, m


******************************************************
** 1) SECTION 3.4 (table 1)                         **
******************************************************

** complete sample

sum female age highschool university survexp surv4plus

** comparison treatment and control group

prtest female, by(treatment)
display 2*(asin(sqrt(0.6779)) - asin(sqrt(0.625))) // Cohen's H

ttest age, by(treatment)  
esize twosample age, by(treatment) cohensd // Cohen's D

prtest university, by(treatment)
display 2*(asin(sqrt(0.375)) - asin(sqrt(0.203))) // Cohen's H

prtest survexp, by(treatment)
display 2*(asin(sqrt(0.8644)) - asin(sqrt(0.8182))) // Cohen's H

prtest surv4plus, by(treatment)
display 2*(asin(sqrt(0.4364)) - asin(sqrt(0.4237))) // Cohen's H


******************************************************
** 2) SECTION 4.1.1                                 **
******************************************************

** time between repetitions

sum minutes_betw_reps

** table 2. proportions of self-reported memory and correct reproduction of previous answers

tab memory, m
tab consistent, m


******************************************************
** 3) SECTION 4.1.2                                 **
******************************************************

** figure 2

xtile decile_minutes_betw_reps = minutes_betw_reps, nquantiles(10)

tab consistent if decile_minutes_betw_reps==1
tab consistent if decile_minutes_betw_reps==2
tab consistent if decile_minutes_betw_reps==3
tab consistent if decile_minutes_betw_reps==4
tab consistent if decile_minutes_betw_reps==5
tab consistent if decile_minutes_betw_reps==6
tab consistent if decile_minutes_betw_reps==7
tab consistent if decile_minutes_betw_reps==8
tab consistent if decile_minutes_betw_reps==9
tab consistent if decile_minutes_betw_reps==10

** correlation time passed & consistent answers

pbis consistent minutes_betw_reps 


******************************************************
** 4) SECTION 4.1.3  (table 3)                      **
******************************************************

tab memory consistent, m row


******************************************************
** 5) SECTION 4.1.4                                 **
******************************************************

** correlation response time for test question when first presented with correct reproduction

pbis consistent Seconds_Memory_Question

** correlation response time for reproduction question with correct reproduction

pbis consistent Seconds_Reproduce_Previous 

** does it hold for both groups (those who indicate to remember, thus are asked to restate
** previous answer, and those who do not, thus are asked to guess previous answer)?

pbis consistent Seconds_Reproduce_Previous_Resta
pbis consistent Seconds_Reproduce_Previous_Guess 


******************************************************
** 6) SECTION 4.2.1                                 **
******************************************************

** table 4. comparison proportions self-reported memory and correct reproduction
** of previous answer between treatment and control group

* self-reported memory

prtest memory, by(treatment)
display 2*(asin(sqrt(0.75)) - asin(sqrt(0.576))) // Cohen's H  

* correct reproduction

prtest consistent, by(treatment)
display 2*(asin(sqrt(0.625)) - asin(sqrt(0.57627))) // Cohen's H

** difference in time interval between repetitions across groups

ttest minutes_betw_reps, by(treatment) // difference is .29 minutes = 17.3 seconds
esize twosample minutes_betw_reps, by(treatment) cohensd // Cohen's D

** table 5. logistic regression models assessing effect of treatment on self-reported
** memory and correct reproduction of previous answer

logit memory treatment minutes_betw_reps age university, or
logit consistent treatment minutes_betw_reps age university, or


******************************************************
** 7) SECTION 4.2.2                                 **
******************************************************

** table 6. 

* correct reproduction among self-reported memory in treatment and control group
tab consistent if memory==1 & treatment ==1
tab consistent if memory==1 & treatment ==0

* correct reproduction among self-reported memory
prtesti 34 .62 42 .69 
display 2*(asin(sqrt(0.69)) - asin(sqrt(0.62))) // Cohen's H

* correct reproduction among self-reported no memory in treatment and control group
tab consistent if memory==0 & treatment ==1
tab consistent if memory==0 & treatment ==0

* correct reproduction among self-reported no memory
prtesti 25 .52 14 .43
display 2*(asin(sqrt(0.52)) - asin(sqrt(0.43))) // Cohen's H 

* memory effect
prtesti 34 .1 42 .26 
display 2*(asin(sqrt(0.26)) - asin(sqrt(0.1))) // Cohen's H


******************************************************
** 8) SECTION 1                                     **
******************************************************

** count questions between repetitions

* drop time stamp variables
drop *_First_Click
drop *_Last_Click
drop *_Page_Submit
drop *_Click_Count

* count average number of sentences judged by people in treatment group
egen count_treatment=rownonmiss(MIT101 - MIT116)
tab count_treatment, m 
sum count_treatment if treatment==1 

* transform string variables to numeric (empty cells are assigned value "1")
foreach var of varlist Q97 Q101 Q161 Q165 Q169_1_0 - Q171_11_0 Q173 Q175 Q199 Q205 Q289 Q285 Q349 Q353 Q357_1_0 - Q363 Q387 Q393 {
encode `var', gen(`var'_n)
}

* drop string variables
drop Q97 Q101 Q161 Q165 Q169_1_0 - Q171_11_0 Q173 Q175 Q199 Q205 Q289 Q285 Q349 Q353 Q357_1_0 - Q363 Q387 Q393 

* set cells with value "1" (previously empty cells in string variables) to sysmis
foreach var of varlist Q97_n Q101_n Q161_n Q165_n Q169_1_0_n - Q171_11_0_n Q173_n Q175_n Q199_n Q205_n Q289_n Q285_n Q349_n Q353_n Q357_1_0_n - Q363_n Q387_n Q393_n {
replace `var'=. if `var'==1
}

* count check-all-that-apply questions separately
egen countAllQ169 = rowtotal(Q169_1_0_n-Q169_11_0_n), missing
egen countAllQ171 = rowtotal(Q171_1_0_n-Q171_11_0_n), missing
egen countAllQ177 = rowtotal(Q177_1-Q177_3), missing
egen countAllQ179 = rowtotal(Q179_1-Q179_9), missing
egen countAllQ181 = rowtotal(Q181_1-Q181_4), missing
egen countAllQ183 = rowtotal(Q183_1-Q183_13), missing
egen countAllQ185 = rowtotal(Q185_1-Q185_12), missing
egen countAllQ187 = rowtotal(Q187_1-Q187_12), missing
egen countAllQ189 = rowtotal(Q189_1-Q189_6), missing
egen countAllQ207 = rowtotal(Q207_1-Q207_7), missing

egen countAllQ357 = rowtotal(Q357_1_0_n-Q357_11_0_n), missing
egen countAllQ359 = rowtotal(Q359_1_0_n-Q359_11_0_n), missing
egen countAllQ365 = rowtotal(Q365_1-Q365_3), missing
egen countAllQ367 = rowtotal(Q367_1-Q367_9), missing
egen countAllQ369 = rowtotal(Q369_1-Q369_4), missing
egen countAllQ371 = rowtotal(Q371_1-Q371_13), missing
egen countAllQ373 = rowtotal(Q373_1-Q373_12), missing
egen countAllQ375 = rowtotal(Q375_1-Q375_12), missing
egen countAllQ377 = rowtotal(Q377_1-Q377_6), missing
egen countAllQ395 = rowtotal(Q395_1-Q395_7), missing

* drop variables from check-all-that-apply questions
drop Q169_*
drop Q171_*
drop Q177_*
drop Q179_*
drop Q181_*
drop Q183_*
drop Q185_*
drop Q187_*
drop Q189_*
drop Q207_*

drop Q357_*
drop Q359_*
drop Q365_*
drop Q367_*
drop Q369_*
drop Q371_*
drop Q373_*
drop Q375_*
drop Q377_*
drop Q395_*

* sort variables for counting
order _all, seq
order Progress year1 year2 DistributionChannel - LocationLongitude, last
order countAllQ169-countAllQ395, first
order D28-D30_0, first // first repetition
order Q237 Q241 Q245 Q425 Q429 Q433, first // question about memory and second repetition

* count average number of questions answered by respondents overall
egen nonmis_all=rownonmiss(countAllQ169 - Q440)
mean nonmis_all
